
/* This program calculates the variables for some of the descriptive statistics 
Make sure to set up the correct directory path */

cap log close
set more 1
clear


#d cr 

// Use married dataset as main data
use "dta\Married_Files\married_sample.dta", clear

// sort by unique family identifier
sort uniqfm

// Merge with the unmarried data calculations file 
merge 1:1 uniqfm using "dta\Unmarried_Files\unmarried_sample.dta"


// Save merged datasets // 
save "dta\Penalties\tax_liabilities.dta", replace 



// BLS CPI from July of tax year to July 2020

gen inflator=1.90236 if year==1991
replace inflator=1.84414 if year==1992
replace inflator=1.79433 if year==1993
replace inflator=1.74596 if year==1994
replace inflator=1.69902 if year==1995
replace inflator=1.65032 if year==1996
replace inflator=1.61434 if year==1997
replace inflator=1.58763 if year==1998
replace inflator=1.55430 if year==1999
replace inflator=1.49943 if year==2000
replace inflator=1.45972 if year==2001
replace inflator=1.43865 if year==2002
replace inflator=1.40892 if year==2003
replace inflator=1.36801 if year==2004
replace inflator=1.32600 if year==2005
replace inflator=1.27322 if year==2006
replace inflator=1.24389 if year==2007
replace inflator=1.17792 if year==2008
replace inflator=1.20316 if year==2009
replace inflator=1.18848 if year==2010
replace inflator=1.14686 if year==2011
replace inflator=1.13093 if year==2012
replace inflator=1.10918 if year==2013
replace inflator=1.08752 if year==2014
replace inflator=1.08568 if year==2015
replace inflator=1.07677 if year==2016
replace inflator=1.05848 if year==2017
replace inflator=1.02815 if year==2018

drop if ((pwages+swages)*inflator)>1000000

// Calculate differences in federal tax liabilities //
gen feddiff1=(m_Fliability-un_Fliability)*inflator

// Apply a de minimum to the tax/bonus //
replace feddiff1=0 if feddiff1>-10 & feddiff1<10 

// Calculate the penalty for being either married or unmarried //
gen feddiff2=feddiff
replace feddiff2=feddiff2*(-1) if marst!=1

// Calculate differences in state tax liabilities //
gen stddiff1=m_Sliability-un_Sliability

// Calculate the penalty for being either married or unmarried //
gen stddiff2=stddiff1 
replace stddiff2=stddiff2*(-1) if marst!=1


// Calculate differences in Total liabilities // 
gen Tdiff1=m_Tliability-un_Tliability

// Calculate the penalty for being either married or unmarried //
gen Tdiff2=Tdiff1 
replace Tdiff2=Tdiff2*(-1) if marst!=1


// redifine the first measure of race to compare races under first definition of race (both of the same race) // 
gen black1=1 if race1==2  // black couple 
replace black1=0 if race1==1 | race1==3
gen oth1=1 if race1==3  // other race couple
replace oth1=0 if race1==1 | race1==2


// redifine the second measure of race to include (both black or both part black) and (both white or 3/4 white among the two) - same for "other races"
gen black2=1 if race2==2 // both black, or both part black
replace black2=0 if race2==1 | race2==3 
gen oth2=1 if race2==3  // other race
replace oth2=0 if race2==1 | race2==2



// Redifine the third measure of reace to include other combinations of race //
gen black3=1 if race3==2 // both black, or both part black
replace black3=0 if race3==1 | race3==3 
gen oth3=1 if race3==3  // other race
replace oth3=0 if race3==1 | race3==2


// Redifine Hispanic into 2 different dummies to include different measures and compare to non-hispanic//
gen hisp1=1 if hisp==1 // Full hispanic couple 
replace hisp1=0 if hisp!=1


// Include hispanics where at least one of the 2 is hispanic
gen hisp2=1 if hisp==1 
replace hisp2=1 if hisp==2
replace hisp2=0 if hisp==0



local startyear 1991
local endyear 2018

local penalties feddiff1 Tdiff1
local x1 black1 oth1 
local x2 black2 oth2 
local x3 black3 oth3


gen married=1 if marst==1
replace married=0 if marst!=1

// Find the percent married by race
gen white_married=.
gen black_married=.
gen other_married=.
forvalues t=1995/2018{
	reg married black1 oth1 [pweight=asecfwt] if year==`t', robust
	replace white_married= _b[_cons] if year==`t'
	replace black_married= white_married + _b[black1] if year==`t'
	replace other_married= white_married + _b[oth1] if year==`t'
	}

// generate couples income 
gen real_income=(pwages+swages)*inflator
gen inc_group=0 if real_income==0
replace inc_group=1 if real_income>0 & real_income<=20000
replace inc_group=2 if real_income>20000 & real_income<=40000
replace inc_group=3 if real_income>40000 & real_income<=60000
replace inc_group=4 if real_income>60000 & real_income<=80000
replace inc_group=5 if real_income>80000 & real_income<=100000
replace inc_group=6 if real_income>100000 & real_income<=120000
replace inc_group=7 if real_income>120000 & real_income<=140000
replace inc_group=8 if real_income>140000 & real_income<=160000
replace inc_group=9 if real_income>160000 


// generate income split groups
gen split_group1=0 
replace split_group1=1 if incratio<=.2

gen split_group2=0 
replace split_group2=1 if incratio>0.2 & incratio<=.4

gen split_group3=0 
replace split_group3=1 if incratio>0.4 & incratio<=.6

gen split_group4=0 
replace split_group4=1 if incratio>0.6 & incratio<=.8

gen split_group5=0 
replace split_group5=1 if incratio>0.8
 

// Income Split Based on Earnings only 
gen lowearn=.
replace lowearn=pwages if pwages==swages | pwages<swages
replace lowearn=swages if pwages>swages
gen highearn=. 
replace highearn=pwages if pwages>swages
replace highearn=swages if pwages==swages | pwages<swages
gen earnrat=lowearn/highearn

gen spl_earn1=0
replace spl_earn1=1 if earnrat<=.2 & earnrat<0.000000000001
gen spl_earn2=0
replace spl_earn2=1 if earnrat>.2 & earnrat<=.4
gen spl_earn3=0
replace spl_earn3=1 if earnrat>.4 & earnrat<=.6
gen spl_earn4=0
replace spl_earn4=1 if earnrat>.6 & earnrat<=.8
gen spl_earn5=0
replace spl_earn5=1 if earnrat>.8 & earnrat==1 & lowearn!=0
gen spl_earn6=0
replace spl_earn6=1 if earnrat==1 & lowearn==0


// Generate percentages using earnings ratio

forvalues i=1/6 {
forvalues s=0/9 {
gen pct_splt`i'_black_`s'=.
forvalues t=1991/2018 {
reg spl_earn`i' [pweight=asecfwt] if inc_group==`s' & black1==1 & year==`t', robust
replace pct_splt`i'_black_`s'= _b[_cons] if year==`t'
}
}
}



forvalues i=1/6 {
forvalues s=0/9 {
gen pct_splt`i'_other_`s'=.
forvalues t=1991/2018 {
reg spl_earn`i' [pweight=asecfwt] if inc_group==`s' & oth1==1 & year==`t', robust
replace pct_splt`i'_other_`s'= _b[_cons] if year==`t' 
}
}
}

	


forvalues i=1/6 {
forvalues s=0/9 {
gen pct_splt`i'_white_`s'=.
forvalues t=1991/2018 {
reg spl_earn`i' [pweight=asecfwt] if inc_group==`s' & black1==0 & oth1==0 & year==`t', robust
replace pct_splt`i'_white_`s'= _b[_cons] if year==`t'
}
}
}


//  Income split distribution conditional on marital status  (White)

forvalues i=1/5 {
forvalues s=0/9 {
forvalues m=0/1{
gen prct_split`i'_`s'_white_`m'=.
reg spl_earn`i' [pweight=asecfwt] if inc_group==`s' & black1==0 & oth1==0 & married==`m' & year==2018, robust
replace prct_split`i'_`s'_white_`m'= _b[_cons] if year==2018
}
}
}

//  Income split distribution conditional on marital status  (Black)
 
forvalues i=1/6 {
forvalues s=0/9 {
forvalues m=0/1{
gen prct_split`i'_`s'_black_`m'=.
reg spl_earn`i' [pweight=asecfwt] if inc_group==`s' & black1==1 & married==`m' & year==2018, robust
replace prct_split`i'_`s'_black_`m'= _b[_cons] if year==2018
}
}
}



// Find distribution of income by race (black & white only) for 2018
forvalues i=0/9 {
gen inc_group`i'=0 
replace inc_group`i'=1 if inc_group==`i'
}


forvalues i=0/9 {
gen prct_inc_group_black_`i'=.
gen prct_inc_group_white_`i'=.
reg inc_group`i' [pweight=asecfwt] if black1==1 & year==2018, robust
replace prct_inc_group_black_`i'= _b[_cons] if year==2018
reg inc_group`i' [pweight=asecfwt] if black1==0 & oth1==0 & year==2018, robust	
replace prct_inc_group_white_`i'= _b[_cons] if year==2018	
} 

// Income distribution For married and unmarried separately 
forvalues i=0/9 {
forvalues m=0/1 {
gen prct_inc_group_black_`i'_`m'=.
gen prct_inc_group_white_`i'_`m'=.
reg inc_group`i' [pweight=asecfwt] if black1==1 & married==`m' & year==2018, robust
replace prct_inc_group_black_`i'_`m'= _b[_cons] if year==2018
reg inc_group`i' [pweight=asecfwt] if black1==0 & oth1==0 & married==`m' &year==2018, robust	
replace prct_inc_group_white_`i'_`m'= _b[_cons] if year==2018	
} 
}

	
// Find out the percent of two earner within each group conditional on having earnings
gen twoearn1=.
replace twoearn1=1 if pwages>0 & swages>0
replace twoearn1=0 if (pwages>0 & swages==0) | (pwages==0 & swages>0)

local penalties feddiff1 Tdiff1
local x1 black1 oth1 
local x2 black2 oth2 
local x3 black3 oth3

forvalues j=1/3{
	gen wtearn_`j' =.
	gen blkearn_`j'=.
	gen othearn_`j'=.
	gen diffearn_wb_`j' =.
	gen diffearn_wo_`j' =.
		forvalues t=1991/1994{
			reg twoearn1 `x`j'' [pweight=asecfwt] if year==`t', robust
			replace wtearn_`j'= _b[_cons] if year==`t'
			replace diffearn_wb_`j' = _b[black`j'] if year==`t'
			replace diffearn_wo_`j' = _b[oth`j'] if year==`t' 
			replace othearn_`j'= wtearn_`j'+ diffearn_wo_`j' if year==`t' 
			replace blkearn_`j'= wtearn_`j'+ diffearn_wb_`j' if year==`t' 
			}
}



forvalues j=1/3{
	forvalues m=0/1{
		forvalues t=1995/2018{
			reg twoearn1 `x`j'' [pweight=asecfwt] if year==`t' & married==`m', robust
			replace wtearn_`j'= _b[_cons] if year==`t' & married==`m'
			replace diffearn_wb_`j' = _b[black`j'] if year==`t'& married==`m'
			replace diffearn_wo_`j' = _b[oth`j'] if year==`t' & married==`m'
			replace othearn_`j'= wtearn_`j'+ diffearn_wo_`j' if year==`t' & married==`m'
			replace blkearn_`j'= wtearn_`j'+ diffearn_wb_`j' if year==`t' & married==`m'
			}
}
}

gen closeearn=1 if earnrat>=0.75 & earnrat<=1 & twoearn1!=.
replace closeearn=0 if earnrat>=0 & earnrat<0.75 

// Out of the dual earners, what percent have somewhat similar incomes //
forvalues j=1/3{
	gen wtincrat_`j' =.
	gen blkincrat_`j'=.
	gen othincrat_`j'=.
	gen diffincrat_wb_`j' =.
	gen diffincrat_wo_`j' =.
		forvalues t=1991/1994{
			reg closeearn `x`j'' [pweight=asecfwt] if year==`t', robust
			replace wtincrat_`j'= _b[_cons] if year==`t'
			replace diffincrat_wb_`j' = _b[black`j'] if year==`t' 
			replace diffincrat_wo_`j' = _b[oth`j'] if year==`t' 
			replace othincrat_`j'= wtincrat_`j'+ diffincrat_wo_`j' if year==`t' 
			replace blkincrat_`j'= wtincrat_`j'+ diffincrat_wb_`j' if year==`t' 
			}
}


forvalues j=1/3{
		forvalues m=0/1{ 
		forvalues t=1995/2018{
			reg closeearn `x`j'' [pweight=asecfwt] if year==`t' & married==`m', robust
			replace wtincrat_`j'= _b[_cons] if year==`t' & married==`m'
			replace diffincrat_wb_`j' = _b[black`j'] if year==`t' & married==`m'
			replace diffincrat_wo_`j' = _b[oth`j'] if year==`t' & married==`m'
			replace othincrat_`j'= wtincrat_`j'+ diffincrat_wo_`j' if year==`t' & married==`m'
			replace blkincrat_`j'= wtincrat_`j'+ diffincrat_wb_`j' if year==`t' & married==`m'
			}
}
}

// Black and Hispanic Pct Statistics 

forvalues i=1/3 {
gen pct_black`i'=.
gen pct_oth`i'=.
forvalues t=1991/1994 {
	reg black`i' [pweight=asecfwt] if year==`t' , robust
	replace pct_black`i'=_b[_cons] if year==`t'  
	reg oth`i' [pweight=asecfwt] if year==`t' , robust
	replace pct_oth`i'=_b[_cons] if year==`t' 
}
}


forvalues i=1/3 {
forvalues m=0/1 {
forvalues t=1995/2018 {
	reg black`i' [pweight=asecfwt] if year==`t' & married==`m' , robust
	replace pct_black`i'=_b[_cons] if year==`t' & married==`m' 
	reg oth`i' [pweight=asecfwt] if year==`t' & married==`m' , robust
	replace pct_oth`i'=_b[_cons] if year==`t' & married==`m' 
}
}
}



forvalues i=1/2{
gen pct_hsp`i'=.
forvalues t=1991/1994 {
	reg hisp`i' [pweight=asecfwt] if year==`t', robust
	replace pct_hsp`i'=_b[_cons] if year==`t' 
}
}


forvalues i=1/2{
forvalues m=0/1{
forvalues t=1995/2018 {
	reg hisp`i' [pweight=asecfwt] if year==`t' & married==`m' , robust
	replace pct_hsp`i'=_b[_cons] if year==`t' & married==`m' 
}
}
}



forvalues i=1/2{
forvalues n=1/3{
gen pct_black_hsp`i'_`n'=.
gen pct_oth_hsp`i'_`n'=.
forvalues t=1991/1994 {
	reg black`n' [pweight=asecfwt] if year==`t' & hisp`i'==1, robust
	replace pct_black_hsp`i'_`n'= _b[_cons] if year==`t' 
	reg oth`n' [pweight=asecfwt] if year==`t' & hisp`i'==1 , robust
	replace pct_oth_hsp`i'_`n'= _b[_cons] if year==`t'  
	}
	}
	}


	
forvalues i=1/2{
forvalues n=1/3{
forvalues m=0/1{
forvalues t=1995/2018 {
	reg black`n' [pweight=asecfwt] if year==`t' & hisp`i'==1 & married==`m' , robust
	replace pct_black_hsp`i'_`n'= _b[_cons] if year==`t' & married==`m' 
	reg oth`n' [pweight=asecfwt] if year==`t' & hisp`i'==1 & married==`m' , robust
	replace pct_oth_hsp`i'_`n'= _b[_cons] if year==`t' & married==`m' 
	}
	}
	}
}
	
	
/* Te following lines call on other do files that just use the variables created to generate graphs, but not all graphs are used on the paper
// The replication files include one of the graphing do files as an example. 
 	
sort year married
bysort year married: gen obs=_n


do "do\marriage_rates_nm.do"

do "do\two_earner_percents_nm.do"

do "do\two_earner_percents_unmarried_nm.do"

do "do\close_earner_percents_nm.do"

do "do\close_earner_percents_unmarried_nm.do"


*/

keep if obs==1

save "dta\ResultsRR\descriptive_groups_nm.dta", replace


